#!/usr/bin/env python3
# vim:fileencoding=utf-8

import sys
import os
import time
import re
import lxml.html
from urllib.parse import unquote as URIunescape
from ard import ard

baseurl         = 'http://www.wowstory.com'
ark_re          = re.compile(r'ard\("([^"]+)", "([^"]+)"')
ark_var_re      = re.compile(r'unescape\("([^"]+)"')
ark_var_real_re = re.compile(r'var (ark\d)="([^"]+)"')
script_re       = re.compile(r'arw\("([^"]+)", ([^)]+)')

def downloadpage(pageurl, file):
  doc = lxml.html.parse(pageurl)
  ark = {}
  script = doc.xpath('/html/head/script')[-1].text
  m = ark_re.search(script)
  script = ard(m.group(1), m.group(2))
  m = ark_re.search(script)
  script = ard(m.group(1), m.group(2))
  arks = URIunescape(URIunescape(''.join(ark_var_re.findall(script))))
  for m in ark_var_real_re.finditer(arks):
    ark[m.group(1)] = m.group(2)

  tags = doc.xpath('//div[@class="articlepart"]/p')
  for i in tags:
    file.write(i.text.strip())
    if len(i) == 1:
      script = i[0]
      m = script_re.match(script.text)
      file.write(ard(m.group(1), ark[m.group(2)]))
      file.write(script.tail.rstrip())
    file.write('\n\n')

def downloadbook(bookurl):
  doc = lxml.html.parse(bookurl)
  titletag = doc.xpath('//h1')[0]
  title = titletag.text
  author = titletag[0].tail
  f = open(title + '.txt', 'w')
  print(title, author, bookurl, sep='\n', end='\n\n', file=f)

  tags = doc.xpath('//div[@class="articlepart"]/p/a')
  print('下载《%s》，作者 %s，共 %d 页' % (title, author, len(tags)))
  for a in tags:
    f.write(a.text)
    f.write('\n\n')
    while True:
      try:
        print('下载：%s' % a.text)
        downloadpage(baseurl + a.get('href'), f)
        break
      except (AttributeError, KeyError):
        time.sleep(1)
        print('retry')

def test(url):
  f = open('test.txt', 'w')
  downloadpage(url, f)
  f.close()

def main():
  if len(sys.argv) == 1:
    print('请给出一些 URL 地址')
    sys.exit(1)
  urls = sys.argv[1:]
  for u in urls:
    if not u.startswith(baseurl):
      print('参数 %s 不是正确的 URL.' % u)
      sys.exit(2)
  for u in urls:
    downloadbook(u)
  print('下载全部完成')

if __name__ == '__main__':
  try:
    main()
  except KeyboardInterrupt:
    print('放弃下载')
